# -*- coding: utf-8 -*-
import lxml.etree
from lxml.cssselect import CSSSelector
# from BeautifulSoup import BeautifulSoup

htmlpath = '../htmlfiles/example.html'

with open(htmlpath, 'r') as f:
    parser = lxml.etree.HTMLParser(encoding='utf-8')
    tree = lxml.etree.fromstring(f.read(), parser)
    h1 = CSSSelector('h1')(tree)[0]
    print dir(h1)
    print h1.text
    p = CSSSelector('p')(tree)
    print [_p.text for _p in p]
    f.close()
